celfiles<-list.celfiles("./cel_files/",full.names=TRUE)
data<-read.celfiles(celfiles)## Reading in : ./cel_files//A1H_(miRNA-4_0).CEL
## Reading in : ./cel_files//A2H_(miRNA-4_0).CEL
## Reading in : ./cel_files//A3H_(miRNA-4_0).CEL
## Reading in : ./cel_files//C1H_(miRNA-4_0).CEL
## Reading in : ./cel_files//C2H_(miRNA-4_0).CEL
## Reading in : ./cel_files//C3H_(miRNA-4_0).CEL
## Reading in : ./cel_files//F10_(miRNA-4_0).CEL
## Reading in : ./cel_files//F11_(miRNA-4_0).CEL
## Reading in : ./cel_files//K11_(miRNA-4_0).CEL
## Reading in : ./cel_files//K13_(miRNA-4_0).CEL
## Reading in : ./cel_files//L13_(miRNA-4_0).CEL
## Reading in : ./cel_files//L7_(miRNA-4_0).CEL
## Reading in : ./cel_files//P1_(miRNA-4_0).CEL
## Reading in : ./cel_files//P2_(miRNA-4_0).CEL
## Reading in : ./cel_files//P3_(miRNA-4_0).CEL
# Anotació fenotipica
pheno <- read.csv(file="./cel_files/Registremostres.csv",sep=";")
pheno<-pheno %>%
mutate(NaCl_envellides=ifelse(NaCl_envellides =="si"&PBS_NaCl=="si",1,
ifelse(NaCl_envellides =="si"&PBS_NaCl=="no",2,"no")))datatable_jm<-function(x,column=NULL){
# if(is.na(column)){column<-0}
datatable(
x,
extensions = 'Buttons',
filter = list(
position = 'top', clear = T
),
options = list(dom = 'Blfrtip',buttons = list(list(extend = 'colvis')),
buttons = list('copy', 'print',
list(extend = 'collection',
buttons = c('csv', 'excel', 'pdf'),
text = 'Download')),
columnDefs = list(list(visible=FALSE, targets=column))))}pheno$nom<-gsub("_[(]miRNA-4_0).CEL","",pheno$Nombre.Cel.file)
rownames(pheno)<-pheno$Nombre.Cel.file
# phenoData(data) <- AnnotatedDataFrame(pheno)
pheno_ano<-AnnotatedDataFrame(pheno)
data<-read.celfiles(celfiles, phenoData = pheno_ano)## Reading in : ./cel_files//A1H_(miRNA-4_0).CEL
## Reading in : ./cel_files//A2H_(miRNA-4_0).CEL
## Reading in : ./cel_files//A3H_(miRNA-4_0).CEL
## Reading in : ./cel_files//C1H_(miRNA-4_0).CEL
## Reading in : ./cel_files//C2H_(miRNA-4_0).CEL
## Reading in : ./cel_files//C3H_(miRNA-4_0).CEL
## Reading in : ./cel_files//F10_(miRNA-4_0).CEL
## Reading in : ./cel_files//F11_(miRNA-4_0).CEL
## Reading in : ./cel_files//K11_(miRNA-4_0).CEL
## Reading in : ./cel_files//K13_(miRNA-4_0).CEL
## Reading in : ./cel_files//L13_(miRNA-4_0).CEL
## Reading in : ./cel_files//L7_(miRNA-4_0).CEL
## Reading in : ./cel_files//P1_(miRNA-4_0).CEL
## Reading in : ./cel_files//P2_(miRNA-4_0).CEL
## Reading in : ./cel_files//P3_(miRNA-4_0).CEL
# RMA
data_rma<-rma(data)## Background correcting
## Normalizing
## Calculating Expression
library(dplyr)
df <- read.csv("/home/SHARED/annotation_affymetrix/miRNA-4_0-st-v1.annotations.20160922.csv",comment.char = "#")
df<-
df %>%
group_by(Accession) %>%
mutate(gene=paste(Accession, collapse="|"))
feat<-df[match(rownames(data_rma@featureData@data),df$Probe.Set.Name),]
feat<-AnnotatedDataFrame(feat)
rownames(feat@data)<-
rownames(data_rma@featureData)
data_rma@featureData<-feat
dup.ids <- feat@data$Accession[duplicated(feat@data$Accession)] %>%
unique %>%
sort
data_rma<-
data_rma[,pData(data_rma)$temps!=2|is.na(pData(data_rma)$temps)]
data_rma<-
data_rma[data_rma@featureData@data$Species.Scientific.Name=="Homo sapiens",]
data_rma<-data_rma[data_rma@featureData@data$Sequence.Type=="miRNA",]
exp_rma <- exprs(data_rma)# Filtre
comparativa<-"NaCl_envellides"
data_rma<-data_rma[,data_rma@phenoData@data[,comparativa]!="no"]
# mostres:
# mostres<-c("C2H","C1H","C3H","A1H","A2H","A3H")
# data_rma_f<-data_rma[,data_rma@phenoData@data$nom%in%mostres]
g1<-levels(as.factor(data_rma@phenoData@data[,comparativa]))[1]
g2<-levels(as.factor(data_rma@phenoData@data[,comparativa]))[2]
data_rma@phenoData@data[,comparativa]<-gsub(g1,"NaCl",data_rma@phenoData@data[,comparativa])
data_rma@phenoData@data[,comparativa]<-gsub(g2,"envellides",data_rma@phenoData@data[,comparativa])
g1<-"NaCl"
g2<-"envellides"
data_rma<-
data_rma[data_rma@featureData@data$Species.Scientific.Name=="Homo sapiens",]
data_rma<-data_rma[data_rma@featureData@data$Sequence.Type=="miRNA",]
data_rma_ind<-data_rma
exp_rma <- exprs(data_rma_ind)
phenotype_names <- ifelse(str_detect(pData(data_rma)[,comparativa],g1), g1, g2)
annotation_for_heatmap <- data.frame(Phenotype = phenotype_names)
row.names(annotation_for_heatmap) <- pData(data_rma_ind)$Id_de_la_muestra
dists <- as.matrix(dist(t(exp_rma), method = "manhattan"))
rownames(dists) <- pData(data_rma_ind)$Id_de_la_muestra
hmcol <- rev(colorRampPalette(RColorBrewer::brewer.pal(9, "YlOrRd"))(255))
colnames(dists) <- NULL
diag(dists) <- NA
ann_colors <- list(
Phenotype = c(g1 = "chartreuse4", g2 = "burlywood3")
)
names(ann_colors$Phenotype)<-c(g1,g2)
# png(paste0("resultats/","/PLOTS/heatmap.png"),width = 800,height = 800,res=150)
pheatmap(dists, col = (hmcol),
annotation_col = annotation_for_heatmap,
annotation_colors = ann_colors,
legend = TRUE,
treeheight_row = 10,
legend_breaks = c(min(dists, na.rm = TRUE),
max(dists, na.rm = TRUE)),
legend_labels = (c("small distance", "large distance")),
main = "Heatmap calibrated samples").tmp<-dev.off()
# knitr::include_graphics("resultats/PLOTS/heatmap.png")groups = phenotype_names
f = factor(groups)
design = model.matrix(~ 0 + f)
colnames(design) = c(g1,g2)
data.fit = lmFit(exprs(data_rma_ind),design)
lev <- c(g1, g2)
# Parsing
a<-c(g1,"-",g2)
astr=paste(a, collapse="")
prestr="makeContrasts("
poststr=",levels=design)"
commandstr=paste(prestr,astr,poststr,sep="")
contrast.matrix=eval(parse(text=commandstr))
# colnames(contrast.matrix)<-paste(g1,"-",g2)
data.fit.con = contrasts.fit(data.fit,contrast.matrix)
data.fit.eb = eBayes(data.fit.con)
tab = topTable(data.fit.eb,coef=1,lfc = log2(1.5),number=Inf,adjust.method="BH",genelist = data_rma_ind@featureData@data )
tab_all = topTable(data.fit.eb,coef=1,number=Inf,adjust.method="BH",genelist = data_rma_ind@featureData@data )
cols<-c("logFC","AveExpr")
cols1<-c("P.Value", "adj.P.Val")
FC_tab<-
tab %>% filter(P.Value<=0.05,abs(logFC)>=log2(1.5)) %>%
dplyr::select(-c(GeneChip.Array,Annotation.Date,Sequence,Sequence.Source,Probe.Set.ID,B,t,Probe.Set.Name,Alignments,Clustered.miRNAs.within.10kb,Genome.Context,Target.Genes)) %>%
mutate(across(cols, round, 3)) %>%
mutate(across(all_of(cols1), format.pval))
datatable_mod4(FC_tab,"FC")tab_all$diffexpressed <- "NO"
# if log2Foldchange > 0.6 and pvalue < 0.05, set as "UP"
tab_all$diffexpressed[tab_all$logFC > log2(1.5) & tab_all$P.Value < 0.05] <- "UP"
# if log2Foldchange < -0.6 and pvalue < 0.05, set as "DOWN"
tab_all$diffexpressed[tab_all$logFC < -log2(1.5) & tab_all$P.Value < 0.05] <- "DOWN"
tab_all$delabel <- NA
tab_all$delabel[tab_all$diffexpressed != "NO"] <- tab_all$Transcript.ID.Array.Design.[tab_all$diffexpressed != "NO"]
# plot adding up all layers we have seen so far
ggplot(data=tab_all, aes(x=logFC, y=-log10(P.Value), col=diffexpressed,
label=delabel))+
geom_point() +
theme_minimal() +
geom_text_repel() +
scale_color_manual(values=c("blue", "black", "red")) +
geom_vline(xintercept=c(-0.6, 0.6), col="red") +
geom_hline(yintercept=-log10(0.05), col="red")gens_SIG<-tab %>% filter(P.Value<=0.05,abs(logFC)>=log2(1.5)) %>%
.$Probe.Set.Name
data_rma_SIG<-data_rma[data_rma@featureData@data$Probe.Set.Name%in%gens_SIG,]
data_rma_SIG_df<-exprs(data_rma_SIG)
rownames(data_rma_SIG_df)<-data_rma_SIG@featureData@data$Transcript.ID.Array.Design.
hmcol <- rev(colorRampPalette(RColorBrewer::brewer.pal(5, "YlOrRd"))(255))
colnames(dists) <- NULL
diag(dists) <- NA
ann_colors <- list(
Phenotype = c(g1 = "chartreuse4", g2 = "burlywood3")
)
names(ann_colors$Phenotype)<-c(g1,g2)
p1<-pheatmap(
data_rma_SIG_df,cutree_rows = 2,cutree_cols = 2,
annotation_col = annotation_for_heatmap,
annotation_colors = ann_colors,scale = "row",
legend = F,
treeheight_row = 50,
legend_breaks = c(min(dists, na.rm = TRUE),
max(dists, na.rm = TRUE)),
legend_labels = (c("small distance", "large distance")),
main = "miRNA FC>1.5 & p<=0.05")
p1library(multiMiR)
mirna_int<-c("hsa-miR-4745-5p")
targets <- get_multimir(mirna = mirna_int, summary = T)## Searching mirecords ...
## Searching mirtarbase ...
## Searching tarbase ...
As a first step, we map the gene names to the STRING database identifiers using the map method.
The map function adds an additional column with STRING identifiers to the dataframe that is passed as first parameter.
library(STRINGdb)
# string_db <- STRINGdb$new(version = "11.5", species = 9606, score_threshold = 200, input_directory="")
# example1_mapped <- string_db$map(targets@summary, "target_symbol", removeUnmappedRows = TRUE )
# save(string_db,file = "string_db")
# save(example1_mapped,file = "example1_mapped")
load("string_db")
load("example1_mapped")
dim(example1_mapped)## [1] 242 10
hits <- example1_mapped$STRING_id
hits_entrez <- example1_mapped$target_entrez
# head(subset(example1_mapped, log10(pvalue) >= -log10(0.01) | abs(logFC) >= 0.5))string_db$plot_network(hits,add_summary = T)Returns a list of clusters of interacting proteins.
# clustersList <- string_db$get_clusters(example1_mapped$STRING_id)
# save(clustersList,file="clustersList")
load("clustersList")
for( i in 1:length(clustersList)){
if (length(clustersList[[i]])>1){
print(string_db$plot_network(clustersList[[i]]))
}}## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
hits_universe_clean<-gsub("9606.","",string_db$proteins$protein_external_id)
STRING_id_clean<-gsub("9606.","",example1_mapped$STRING_id)
enrichmentGO <- string_db$get_enrichment( hits, category = "Process" )
hits_clean<-gsub("9606.","",hits)
go_net <- enrichGO(gene = hits_entrez,
# universe = hits_entrez,
OrgDb = org.Hs.eg.db,
ont = "BP",
# keyType = "ENSEMBLPROT",
pAdjustMethod = "fdr",
pvalueCutoff = 0.05,
qvalueCutoff = 0.2,
readable = TRUE)
go_net_df<-go_net@result %>% filter(p.adjust<=0.05)
datatable_jm(enrichmentGO,column = c("preferredNames","inputGenes"))datatable_jm(go_net_df,column = c("geneID"))go_net_df$ID%in%enrichmentGO$term## [1] TRUE FALSE TRUE TRUE
enrichmentKEGG <- string_db$get_enrichment( hits, category = "KEGG" )
datatable_jm(enrichmentKEGG)